InterCD 2001 May

home *** CD-ROM | disk | FTP | other *** search

/ InterCD 2001 May / may_2001.iso / intercd / root / Multimedia / ^DivX_Article / virtualdub / VirtualDub-source-1_4d / mpeg_decode.cpp < prev next >

Wrap

C/C++ Source or Header | 2001-03-26 | 69.6 KB | 2,765 lines

// VirtualDub - Video processing and capture application // Copyright (C) 1998-2001 Avery Lee // // This program is free software; you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation; either version 2 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program; if not, write to the Free Software // Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. // /////////////////////////////////////////////////////////////////////////// // // WARNING // // This code is heavily based off of the Java MPEG video player written by // Joerg Anders. Because his code was released under the GNU GPL v2, this // means VirtualDub must also be released under GNU GPL v2 when MPEG // support is included. // // (Like that's any different.) // // This code is really nasty... // /////////////////////////////////////////////////////////////////////////// #include <stdio.h> #include <process.h> #include <crtdbg.h> #include <assert.h> #include <windows.h> #include <math.h> #include "CMemoryBitInput.h" #include "Error.h" #include "cpuaccel.h" #include "mpeg_idct.h" #include "mpeg_tables.h" #include "mpeg_decode.h" ////////////////////////////////////////////////////////////// #ifdef _DEBUG //#define STATISTICS #endif #define DCT_POSITION_CHECKING //#define MB_STATS //#define MB_SPLIT_STATS //#define TIME_TRIALS //#define NO_DECODING //#define DISPLAY_INTER_COUNT ////////////////////////////////////////////////////////////// #define VIDPKT_TYPE_PICTURE_START (0x00) #define VIDPKT_TYPE_SLICE_START_MIN (0x01) #define VIDPKT_TYPE_SLICE_START_MAX (0xaf) #define MIDVAL (262144) #define ROUNDVAL (1024) #ifdef _DEBUG class BranchPredictor { private: int v[16]; int ls; int taken; int mispredict; int total; const char *s; public: BranchPredictor(const char *_s); ~BranchPredictor(); bool predict(bool b); }; BranchPredictor::BranchPredictor(const char *_s) { memset(v, 0, sizeof v); ls = 0; taken = 0; mispredict = 0; total = 0; s = _s; } BranchPredictor::~BranchPredictor() { char buf[256]; sprintf(buf, "Branch predictor \"%s\": %d branches (%d%% taken), %d mispredicts (%d%%)\n" ,s ,total ,MulDiv(taken, 100, total) ,mispredict ,MulDiv(mispredict, 100, total)); _RPT0(0,buf); } bool BranchPredictor::predict(bool b) { if (b) { if (v[ls]<2) ++mispredict; if (v[ls]<3) ++v[ls]; ++taken; } else { if (v[ls]>=2) ++mispredict; if (v[ls]) --v[ls]; } ls = ((ls<<1)|(int)b) & 15; ++total; return b; } BranchPredictor g_predict080(">=080 (90%)"); BranchPredictor g_predict600("<600 (40%)"); BranchPredictor g_predictC00("C00-FFF (20%)"); BranchPredictor g_predict800("800-BFF"); BranchPredictor g_predict040("040"); BranchPredictor g_predict020("020"); #define PREDICT(ths, v) (g_predict##ths.predict(v)) #else #define PREDICT(ths, v) (v) #endif ////////////////////////////////////////////////////////////// typedef unsigned char YUVPixel; static void video_process_picture_start_packet(char *ptr); static void video_process_picture_slice(char *ptr, int type); static void YUVToRGB32(YUVPixel *Y_ptr, YUVPixel *U_ptr, YUVPixel *V_ptr, unsigned char *dst, long bpr, long w, long h); static void YUVToRGB24(YUVPixel *Y_ptr, YUVPixel *U_ptr, YUVPixel *V_ptr, unsigned char *dst, long bpr, long w, long h); static void YUVToRGB16(YUVPixel *Y_ptr, YUVPixel *U_ptr, YUVPixel *V_ptr, unsigned char *dst, long bpr, long w, long h); static void YUVToUYVY16(YUVPixel *Y_ptr, YUVPixel *U_ptr, YUVPixel *V_ptr, unsigned char *dst, long bpr, long w, long h); static void YUVToYUY216(YUVPixel *Y_ptr, YUVPixel *U_ptr, YUVPixel *V_ptr, unsigned char *dst, long bpr, long w, long h); static void YUVToYUV12(YUVPixel *Y_ptr, YUVPixel *U_ptr, YUVPixel *V_ptr, unsigned char *dst, long bpr, long w, long h); static void video_copy_forward(int x_pos, int y_pos); static void video_copy_forward_prediction(int x_pos, int y_pos, bool); static void video_copy_backward_prediction(int x_pos, int y_pos, bool); static void video_add_backward_prediction(int x_pos, int y_pos, bool); /////////////////////////////////////////////////////////////////////////////// static const int zigzag[] = { // the reverse zigzag scan order 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63, }; static const int zigzag_MMX[] = { 0, 2, 8, 16, 10, 4, 6, 12, 18, 24, 32, 26, 20, 14, 1, 3, 9, 22, 28, 34, 40, 48, 42, 36, 30, 17, 11, 5, 7, 13, 19, 25, 38, 44, 50, 56, 58, 52, 46, 33, 27, 21, 15, 23, 29, 35, 41, 54, 60, 62, 49, 43, 37, 31, 39, 45, 51, 57, 59, 53, 47, 55, 61, 63, }; static const int intramatrix_default[64] = { // the default intramatrix 8, 16, 19, 22, 26, 27, 29, 34, 16, 16, 22, 24, 27, 29, 34, 37, 19, 22, 26, 27, 29, 34, 34, 38, 22, 22, 26, 27, 29, 34, 37, 40, 22, 26, 27, 29, 32, 35, 40, 48, 26, 27, 29, 32, 35, 40, 48, 58, 26, 27, 29, 34, 38, 46, 56, 69, 27, 29, 35, 38, 46, 56, 69, 83}; static char *memblock = NULL; static int intramatrix0[64]; static int nonintramatrix0[64]; static int intramatrices[32][64]; static int nonintramatrices[32][64]; static struct MPEGBuffer { YUVPixel *Y; YUVPixel *U; YUVPixel *V; int frame_num; } buffers[3]; #define I_FRAME (0x1) #define P_FRAME (0x2) #define B_FRAME (0x3) #define D_FRAME (0x4) static int frame_type; static YUVPixel *Y_back, *Y_forw, *Y_dest; static YUVPixel *U_back, *U_forw, *U_dest; static YUVPixel *V_back, *V_forw, *V_dest; static long pelWidth, pelHeight, mbWidth, mbHeight; static BOOL reset_flag; static long y_pitch, uv_pitch; static long y_modulo, uv_modulo; extern "C" const unsigned long YUV_Y_table[], YUV_U_table[], YUV_V_table[]; extern "C" const unsigned char YUV_clip_table[]; #ifdef STATISTICS struct { int coded_block_pattern; } stats; #endif #ifdef TIME_TRIALS struct { int counts[4]; __int64 cycles[4]; __int64 totalcycles; int totalframes; } timetrials; #endif static int vector_limit_x; static int vector_limit_y; static enum { MPEG_NOT_READY, MPEG_READY_SCALAR, MPEG_READY_MMX } mpeg_ready_state; extern "C" void video_copy_prediction_Y_ISSE(YUVPixel *src, YUVPixel *dst, int vx, int vy, long pitch); extern "C" void video_copy_prediction_C_ISSE(YUVPixel *src, YUVPixel *dst, int vx, int vy, long pitch); extern "C" void video_add_prediction_Y_ISSE(YUVPixel *src, YUVPixel *dst, int vx, int vy, long pitch); extern "C" void video_add_prediction_C_ISSE(YUVPixel *src, YUVPixel *dst, int vx, int vy, long pitch); extern "C" void video_copy_prediction_Y_scalar(YUVPixel *src, YUVPixel *dst, int vx, int vy, long pitch); extern "C" void video_copy_prediction_C_scalar(YUVPixel *src, YUVPixel *dst, int vx, int vy, long pitch); extern "C" void video_add_prediction_Y_scalar(YUVPixel *src, YUVPixel *dst, int vx, int vy, long pitch); extern "C" void video_add_prediction_C_scalar(YUVPixel *src, YUVPixel *dst, int vx, int vy, long pitch); extern "C" void video_copy_prediction_Y_MMX(YUVPixel *src, YUVPixel *dst, int vx, int vy, long pitch); extern "C" void video_copy_prediction_C_MMX(YUVPixel *src, YUVPixel *dst, int vx, int vy, long pitch); extern "C" void video_add_prediction_Y_MMX(YUVPixel *src, YUVPixel *dst, int vx, int vy, long pitch); extern "C" void video_add_prediction_C_MMX(YUVPixel *src, YUVPixel *dst, int vx, int vy, long pitch); static void (*video_copy_prediction_Y)(YUVPixel *src, YUVPixel *dst, int vx, int vy, long pitch); static void (*video_copy_prediction_C)(YUVPixel *src, YUVPixel *dst, int vx, int vy, long pitch); static void (*video_add_prediction_Y)(YUVPixel *src, YUVPixel *dst, int vx, int vy, long pitch); static void (*video_add_prediction_C)(YUVPixel *src, YUVPixel *dst, int vx, int vy, long pitch); //int lpos_stats[64]; //////////////////////////////////////////////////////////////////////////////// void mpeg_deinitialize() { // if (memblock) { freemem(memblock); memblock = NULL; } if (memblock) { VirtualFree(memblock, 0, MEM_RELEASE); memblock = NULL; } } void mpeg_initialize(int width, int height, char *imatrix, char *nimatrix, BOOL fullpel_only) { mpeg_deinitialize(); try { int i; pelWidth = width; pelHeight = height; mbWidth = (width+15)/16; mbHeight = (height+15)/16; y_pitch = mbWidth * 16; uv_pitch = mbWidth * 8; y_modulo = 15*16*mbWidth; uv_modulo = 7*8*mbWidth; vector_limit_x = (mbWidth-1) * 32; vector_limit_y = (mbHeight-1) * 32; // if (!(memblock = (char *)allocmem(32 + mbWidth * mbHeight * (3*256*sizeof(YUVPixel) + 6*64*sizeof(YUVPixel)) + (uv_pitch+8)*8))) // throw MyMemoryError(); if (!(memblock = (char *)VirtualAlloc(NULL, mbWidth * mbHeight * (3*256*sizeof(YUVPixel) + 6*64*sizeof(YUVPixel)) + (32)*8, MEM_COMMIT, PAGE_READWRITE))) throw MyMemoryError(); memset(memblock, 0, mbWidth * mbHeight * (3*256*sizeof(YUVPixel) + 6*64*sizeof(YUVPixel)) + (32)*8); buffers[0].Y = (YUVPixel *)((char *)memblock ); //+ ((32-(long)memblock)&31)); buffers[1].Y = (YUVPixel *)((char *)buffers[0].Y + mbWidth * mbHeight * 256 * sizeof(YUVPixel) + 32); buffers[2].Y = (YUVPixel *)((char *)buffers[1].Y + mbWidth * mbHeight * 256 * sizeof(YUVPixel) + 32); buffers[0].U = (YUVPixel *)((char *)buffers[2].Y + mbWidth * mbHeight * 256 * sizeof(YUVPixel) + 32); buffers[1].U = (YUVPixel *)((char *)buffers[0].U + mbWidth * mbHeight * 64 * sizeof(YUVPixel) + 32); buffers[2].U = (YUVPixel *)((char *)buffers[1].U + mbWidth * mbHeight * 64 * sizeof(YUVPixel) + 32); buffers[0].V = (YUVPixel *)((char *)buffers[2].U + mbWidth * mbHeight * 64 * sizeof(YUVPixel) + 32); buffers[1].V = (YUVPixel *)((char *)buffers[0].V + mbWidth * mbHeight * 64 * sizeof(YUVPixel) + 32); buffers[2].V = (YUVPixel *)((char *)buffers[1].V + mbWidth * mbHeight * 64 * sizeof(YUVPixel) + 32); buffers[0].frame_num = buffers[1].frame_num = buffers[2].frame_num = -1; if (imatrix) for(i=0; i<64; i++) intramatrix0[zigzag[i]] = (unsigned char)imatrix[i]; else memcpy(intramatrix0, intramatrix_default, 64*sizeof(int)); if (nimatrix) for(i=0; i<64; i++) nonintramatrix0[zigzag[i]] = (unsigned char)nimatrix[i]; else for(i=0; i<64; i++) nonintramatrix0[i] = 16; IDCT_init(); mpeg_reset(); } catch(MyError e) { mpeg_deinitialize(); throw e; } } void mpeg_reset() { // for(int i=0; i<64; i++) // _RPT2(0,"%d: %d\n", i, lpos_stats[i]); reset_flag = TRUE; mpeg_ready_state = MPEG_NOT_READY; } void mpeg_convert_frame32(void *output_buffer, int buffer_ID) { // _RPT1(0,"MPEG: converting frame buffer %d\b", buffer_ID); #ifdef NO_DECODING return; #endif #ifdef _DEBUG if (buffer_ID == -1) throw MyError("Invalid source buffer in "__FILE__", line %d",__LINE__); #endif // memset(buffers[buffer_ID].Y, 0x80, y_pitch*mbHeight*16); YUVToRGB32(buffers[buffer_ID].Y, buffers[buffer_ID].U, buffers[buffer_ID].V, (unsigned char *)output_buffer, (mbWidth*16)*4, (pelWidth+1)>>1, (pelHeight+1)>>1); } void mpeg_convert_frame24(void *output_buffer, int buffer_ID) { // _RPT1(0,"MPEG: converting frame buffer %d\b", buffer_ID); #ifdef NO_DECODING return; #endif #ifdef _DEBUG if (buffer_ID == -1) throw MyError("Invalid source buffer in "__FILE__", line %d",__LINE__); #endif // memset(buffers[buffer_ID].Y, 0x80, y_pitch*mbHeight*16); YUVToRGB24(buffers[buffer_ID].Y, buffers[buffer_ID].U, buffers[buffer_ID].V, (unsigned char *)output_buffer, (mbWidth*16)*3, ((pelWidth+7)&-8)>>1, (pelHeight+1)>>1); } void mpeg_convert_frame16(void *output_buffer, int buffer_ID) { // _RPT1(0,"MPEG: converting frame buffer %d\b", buffer_ID); #ifdef NO_DECODING return; #endif #ifdef _DEBUG if (buffer_ID == -1) throw MyError("Invalid source buffer in "__FILE__", line %d",__LINE__); #endif // memset(buffers[buffer_ID].Y, 0x80, y_pitch*mbHeight*16); YUVToRGB16(buffers[buffer_ID].Y, buffers[buffer_ID].U, buffers[buffer_ID].V, (unsigned char *)output_buffer, (mbWidth*16)*2, ((pelWidth+7)&-8)>>1, (pelHeight+1)>>1); } void mpeg_convert_frameUYVY16(void *output_buffer, int buffer_ID) { // _RPT1(0,"MPEG: converting frame buffer %d\b", buffer_ID); #ifdef NO_DECODING return; #endif #ifdef _DEBUG if (buffer_ID == -1) throw MyError("Invalid source buffer in "__FILE__", line %d",__LINE__); #endif YUVToUYVY16(buffers[buffer_ID].Y, buffers[buffer_ID].U, buffers[buffer_ID].V, (unsigned char *)output_buffer, (mbWidth*16)*2, (pelWidth+1)>>1, (pelHeight+1)>>1); } void mpeg_convert_frameYUY216(void *output_buffer, int buffer_ID) { // _RPT1(0,"MPEG: converting frame buffer %d\b", buffer_ID); #ifdef NO_DECODING return; #endif #ifdef _DEBUG if (buffer_ID == -1) throw MyError("Invalid source buffer in "__FILE__", line %d",__LINE__); #endif YUVToYUY216(buffers[buffer_ID].Y, buffers[buffer_ID].U, buffers[buffer_ID].V, (unsigned char *)output_buffer, (mbWidth*16)*2, (pelWidth+1)>>1, (pelHeight+1)>>1); } void mpeg_decode_frame(void *input_data, int len, int frame_num) { #ifdef NO_DECODING return; #endif char *ptr = (char *)input_data; char *limit = ptr + len - 4; int type; if (MMX_enabled) { if (ISSE_enabled) { video_copy_prediction_Y = video_copy_prediction_Y_ISSE; video_add_prediction_Y = video_add_prediction_Y_ISSE; video_copy_prediction_C = video_copy_prediction_C_ISSE; video_add_prediction_C = video_add_prediction_C_ISSE; } else { video_copy_prediction_Y = video_copy_prediction_Y_MMX; video_add_prediction_Y = video_add_prediction_Y_MMX; video_copy_prediction_C = video_copy_prediction_C_MMX; video_add_prediction_C = video_add_prediction_C_MMX; } } else { video_copy_prediction_Y = video_copy_prediction_Y_scalar; video_add_prediction_Y = video_add_prediction_Y_scalar; video_copy_prediction_C = video_copy_prediction_C_scalar; video_add_prediction_C = video_add_prediction_C_scalar; } if (MMX_enabled && mpeg_ready_state != MPEG_READY_MMX) { int i,j; for(j=0; j<32; j++) { for(i=0; i<64; i+=8) { nonintramatrices[j][i+0] = nonintramatrix0[i+0] * j; intramatrices [j][i+0] = intramatrix0 [i+0] * j; nonintramatrices[j][i+2] = nonintramatrix0[i+1] * j; intramatrices [j][i+2] = intramatrix0 [i+1] * j; nonintramatrices[j][i+4] = nonintramatrix0[i+2] * j; intramatrices [j][i+4] = intramatrix0 [i+2] * j; nonintramatrices[j][i+6] = nonintramatrix0[i+3] * j; intramatrices [j][i+6] = intramatrix0 [i+3] * j; nonintramatrices[j][i+1] = nonintramatrix0[i+4] * j; intramatrices [j][i+1] = intramatrix0 [i+4] * j; nonintramatrices[j][i+3] = nonintramatrix0[i+5] * j; intramatrices [j][i+3] = intramatrix0 [i+5] * j; nonintramatrices[j][i+5] = nonintramatrix0[i+6] * j; intramatrices [j][i+5] = intramatrix0 [i+6] * j; nonintramatrices[j][i+7] = nonintramatrix0[i+7] * j; intramatrices [j][i+7] = intramatrix0 [i+7] * j; } } mpeg_ready_state = MPEG_READY_MMX; } else if (!MMX_enabled && mpeg_ready_state != MPEG_READY_SCALAR) { int i,j; for(j=0; j<32; j++) { for(i=0;i<64;i++) { nonintramatrices[j][i] = nonintramatrix0[i]*j; intramatrices[j][i] = intramatrix0[i]*j; } IDCT_norm(intramatrices[j]); IDCT_norm(nonintramatrices[j]); } mpeg_ready_state = MPEG_READY_SCALAR; } #ifdef STATISTICS memset(&stats, 0, sizeof stats); #endif #ifdef TIME_TRIALS __int64 time_start, time_end; __asm { rdtsc mov dword ptr time_start+0,eax mov dword ptr time_start+4,edx }; #endif frame_type = -1; ptr[len-4] = 0; ptr[len-3] = 0; ptr[len-2] = 1; ptr[len-1] = (char)0xff; while(ptr < limit) { do { if (ptr>limit) goto advance; while(*ptr++) if (ptr>limit) goto advance; } while(ptr[0] != 0 || ptr[1] != 1); type = ptr[2]; ptr += 3; // _RPT1(0,"Packet type %02x\n", type); switch(type) { case VIDPKT_TYPE_PICTURE_START: video_process_picture_start_packet(ptr); break; default: if (type >= VIDPKT_TYPE_SLICE_START_MIN && type <= VIDPKT_TYPE_SLICE_START_MAX) video_process_picture_slice(ptr, type); } } if (MMX_enabled) __asm emms #ifdef TIME_TRIALS __asm { rdtsc mov dword ptr time_end+0,eax mov dword ptr time_end+4,edx }; ++timetrials.counts[frame_type-1]; timetrials.cycles[frame_type-1] += (time_end - time_start); timetrials.totalcycles += (time_end - time_start); if (!(++timetrials.totalframes & 63)) { static char buf[256]; wsprintf(buf, "%d I-frames (%d, %d%%), %d P-frames (%d, %d%%), %d B-frames (%d, %d%%)\n" ,timetrials.counts[0] ,timetrials.counts[0] ? (int)(timetrials.cycles[0] / timetrials.counts[0]) : 0 ,timetrials.counts[0] ? (int)((timetrials.cycles[0]*100)/timetrials.totalcycles) : 0 ,timetrials.counts[1] ,timetrials.counts[1] ? (int)(timetrials.cycles[1] / timetrials.counts[1]) : 0 ,timetrials.counts[1] ? (int)((timetrials.cycles[1]*100)/timetrials.totalcycles) : 0 ,timetrials.counts[2] ,timetrials.counts[2] ? (int)(timetrials.cycles[2] / timetrials.counts[2]) : 0 ,timetrials.counts[2] ? (int)((timetrials.cycles[2]*100)/timetrials.totalcycles) : 0); OutputDebugString(buf); } #endif advance: #ifdef DISPLAY_INTER_COUNT memset(U_dest, 0x80, uv_pitch * mbHeight * 8); memset(V_dest, 0x80, uv_pitch * mbHeight * 8); #endif switch(frame_type) { case I_FRAME: mpeg_swap_buffers(MPEG_BUFFER_FORWARD, MPEG_BUFFER_BACKWARD); buffers[MPEG_BUFFER_FORWARD].frame_num = frame_num; reset_flag = FALSE; break; case P_FRAME: mpeg_swap_buffers(MPEG_BUFFER_FORWARD, MPEG_BUFFER_BACKWARD); buffers[MPEG_BUFFER_FORWARD].frame_num = frame_num; reset_flag = FALSE; break; case B_FRAME: buffers[MPEG_BUFFER_BIDIRECTIONAL].frame_num = frame_num; break; #ifdef _DEBUG default: throw MyError("Invalid frame type"); #endif }; #ifdef STATISTICS _RPT2(0,"--- Frame #%d statistics (%c-frame)\n", frame_num, " IPBD567"[frame_type]); _RPT2(0,"\tCoded block pattern: %7d/%d macroblocks\n", stats.coded_block_pattern, mbWidth*mbHeight); _RPT0(0,"\n"); #endif } void mpeg_swap_buffers(int buffer1, int buffer2) { MPEGBuffer b; b = buffers[buffer1]; buffers[buffer1] = buffers[buffer2]; buffers[buffer2] = b; } int mpeg_lookup_frame(int frame) { // _RPT4(0,"Looking for %ld (%ld/%ld/%ld)\n", frame, buffers[0].frame_num, buffers[1].frame_num, buffers[2].frame_num); for(int i=0; i<(sizeof buffers/sizeof buffers[0]); i++) if (buffers[i].frame_num == frame) return i; return -1; } //////////////////////////////////////////////////////////////////////////////////////// static int *intramatrix; static int *nonintramatrix; static int forw_vector_full_pel, forw_vector_bits; static int back_vector_full_pel, back_vector_bits; static int forw_vector_mask, back_vector_mask; static int forw_vector_extend, back_vector_extend; static int forw_vector_x, forw_vector_y; static int back_vector_x, back_vector_y; ///////////////////////////// static void mpeg_set_destination_buffer(int id) { Y_dest = buffers[id].Y; U_dest = buffers[id].U; V_dest = buffers[id].V; } static void video_process_picture_start_packet(char *ptr) { CMemoryBitInput bits(ptr); long temp_rf = bits.get(10); frame_type = bits.get(3); // _RPT2(0,"Processing %c-frame (#%d)\n", " IPBD567"[frame_type], temp_rf); switch(frame_type) { case I_FRAME: // I-frames have no prediction mpeg_set_destination_buffer(MPEG_BUFFER_BACKWARD); // _RPT1(0,"Processing I-frame (#%d)\n", temp_rf); break; case P_FRAME: // P-frames predict back to the last I or P mpeg_set_destination_buffer(MPEG_BUFFER_BACKWARD); // _RPT2(0,"Processing P-frame (#%d) (forward: %ld)\n", temp_rf, buffers[MPEG_BUFFER_FORWARD].frame_num); Y_forw = buffers[MPEG_BUFFER_FORWARD].Y; U_forw = buffers[MPEG_BUFFER_FORWARD].U; V_forw = buffers[MPEG_BUFFER_FORWARD].V; break; case B_FRAME: // B-frames predict back to the last I or P and forward to the next P mpeg_set_destination_buffer(MPEG_BUFFER_BIDIRECTIONAL); // _RPT3(0,"Processing B-frame (#%d) (f: %ld b: %ld)\n", temp_rf, buffers[MPEG_BUFFER_BACKWARD].frame_num, buffers[MPEG_BUFFER_FORWARD].frame_num); Y_back = buffers[MPEG_BUFFER_FORWARD].Y; U_back = buffers[MPEG_BUFFER_FORWARD].U; V_back = buffers[MPEG_BUFFER_FORWARD].V; if (reset_flag) { Y_forw = Y_back; U_forw = U_back; V_forw = V_back; } else { Y_forw = buffers[MPEG_BUFFER_BACKWARD].Y; U_forw = buffers[MPEG_BUFFER_BACKWARD].U; V_forw = buffers[MPEG_BUFFER_BACKWARD].V; } break; case D_FRAME: throw MyError("D-type frames not supported"); default: throw MyError("Unknown frame type 0x%d", frame_type); } bits.get(16); // VBV_delay if (frame_type == P_FRAME || frame_type == B_FRAME) { forw_vector_full_pel = bits.get(); forw_vector_bits = bits.get(3)-1; forw_vector_mask = (32<<forw_vector_bits)-1; forw_vector_extend = ~((16<<forw_vector_bits)-1); } if (frame_type == B_FRAME) { back_vector_full_pel = bits.get(); back_vector_bits = bits.get(3)-1; back_vector_mask = (32<<back_vector_bits)-1; back_vector_extend = ~((16<<back_vector_bits)-1); } } //////////////////////////////////////////////////// static int dct_dc_y_past, dct_dc_u_past, dct_dc_v_past; extern int dct_coeff[64]; #define MBF_NEW_QUANT (16) #define MBF_FORWARD (8) #define MBF_BACKWARD (4) #define MBF_PATTERN (2) #define MBF_INTRA (1) static CMemoryBitInput bits; static int macro_block_flags; YUVPixel *dstY, *dstU, *dstV; ////////////////////// extern "C" void IDCT_mmx(signed short *dct_coeff, void *dst, long pitch, int intra_flag, int pos); extern "C" void IDCT_isse(signed short *dct_coeff, void *dst, long pitch, int intra_flag, int pos); static void decode_mblock(YUVPixel *dst, long modulo, long DC_val) { #ifdef MB_STATS #ifdef MB_SPLIT_STATS #define MB_DECLARE_STAT(x) static int x##_intra=0, x##_inter=0; #define MB_STAT_INC(x) (macro_block_flags & MBF_INTRA ? (++x##_intra) : (++x##_inter)) #else #define MB_DECLARE_STAT(x) static int x=0; #define MB_STAT_INC(x) (++x) #endif MB_DECLARE_STAT(st_level1_idx0); MB_DECLARE_STAT(st_level1_idx1); MB_DECLARE_STAT(st_exit); MB_DECLARE_STAT(st_short); MB_DECLARE_STAT(st_long); MB_DECLARE_STAT(st_vshort); MB_DECLARE_STAT(st_escape); MB_DECLARE_STAT(st_first_short); MB_DECLARE_STAT(st_first_long); #else #define MB_STAT_INC(x) #endif const int *idx=zigzag; int level; int pos=0; int coeff_count = 0; const int *quant_matrix = intramatrix; long v; int sign = 0; dct_coeff[0] = DC_val; v = bits.peek(12); if (!(macro_block_flags & MBF_INTRA)) { quant_matrix = nonintramatrix; sign = 1; if (v < 0x800) { idx = zigzag-1; dct_coeff[0]=0; MB_STAT_INC(st_first_long); } else { if (v & 0x400) dct_coeff[0] = (-3*quant_matrix[0] + 128) >> 8; else dct_coeff[0] = (3*quant_matrix[0] + 128) >> 8; bits.skip(2); v = bits.peek(12); MB_STAT_INC(st_first_short); } } // macroblock statistics from nuku.mpg: // // 2883584 mblocks // 7142656 (42%) very short // 3457222 (20%) level 1, idx_run = 0 // 1604394 ( 9%) level 1, idx_run = 1 // 937613 ( 5%) long // 451518 ( 2%) short // 161300 ( 0%) escape // 1457101 (50%) first long // 705505 (24%) first short // nuku1.mpg: // 47710208 mblocks // 120471300 (43%) very short // 52059388 (18%) level 1, idx_run = 0 // 47710208 (17%) exits // 27499741 ( 9%) level 1, idx_run = 1 // 18368812 ( 6%) long // 7911139 ( 2%) short // 4948686 ( 1%) escape // 29281756 (61%) first long // 14165596 (29%) first short for(;;v = bits.peek(12)) { int level_sign = 0; if (v >= 0x080) { // 080-FFF (90%) if (v < 0x600) { // 080-5FF very short (40%) int t = (v>>4); int bcnt; bcnt = mpeg_dct_coeff_decode0[t*4+2-32]; idx += mpeg_dct_coeff_decode0[t*4+0-32]; level = mpeg_dct_coeff_decode0[t*4+1-32]; _ASSERT(level != 0); bits.skip(bcnt); if (v & mpeg_dct_coeff_decode0[t*4+3-32]) level_sign = -1; MB_STAT_INC(st_vshort); } else if (v >= 0x0c00) { // C00-FFF level1-idx0 (20%) bits.skip(3); ++idx; level = 1; if (v & 0x200) level_sign = -1; MB_STAT_INC(st_level1_idx0); } else if (v >= 0x800) { // 800-BFF bits.skip(2); MB_STAT_INC(st_exit); break; } else { // 600-7FF bits.skip(4); idx += 2; level = 1; if (v & 0x100) level_sign = -1; MB_STAT_INC(st_level1_idx1); } } else { if (v >= 0x040) { bits.skip(6); idx += bits.get(6)+1; level = bits.get_signed(8); // _ASSERT(level != 0); if (!(level & 0x7f)) { level <<= 1; level |= bits.get(8); } if (level<0) { level = -level; level_sign = -1; } MB_STAT_INC(st_escape); } else if (v >= 0x020) { int t = (v>>2); idx += mpeg_dct_coeff_decode1[t*2+0-16]+1; level = mpeg_dct_coeff_decode1[t*2+1-16]; _ASSERT(level != 0); MB_STAT_INC(st_short); bits.skip(11); if (v & 2) level_sign = -1; } else { int t, bcnt; MB_STAT_INC(st_long); bits.skip(7); v = bits.peek(10); t = v>>1; bcnt = mpeg_dct_coeff_decode2[t*4+2]; idx += mpeg_dct_coeff_decode2[t*4+0]+1; level = mpeg_dct_coeff_decode2[t*4+1]; bits.skip(bcnt); _ASSERT(level != 0); if (v & (0x400>>bcnt)) level_sign = -1; } } ++coeff_count; #ifdef DCT_POSITION_CHECKING if (idx >= zigzag+64) { pos = 63; break; } #endif pos = *idx; // quant_matrix: 0...255 // level: -256...255 _ASSERT(level != 0); // We need to oddify coefficients down toward zero. // can't - already added DCT>FFT matrix! level = (((level*2+sign) * quant_matrix[pos] + 128) >> 8); // Negate coefficient if necessary. dct_coeff[pos] = (level ^ level_sign) - level_sign; } #ifdef MB_STATS #ifdef MB_SPLIT_STATS if (st_exit_intra && !(st_exit_intra & 262143)) { static char buf[256]; int total = st_exit_intra + st_short_intra + st_long_intra + st_level1_idx0_intra + st_level1_idx1_intra + st_vshort_intra + st_escape_intra; sprintf(buf, "[intra] %ld mblocks, %ld vshort (%d%%), %ld sh (%d%%), %ld ln (%d%%), %ld 1-0 (%d%%), %ld 1-1 (%d%%), %ld E (%d%%), %ld f-s (%d%%), %ld f-l (%d%%)\n" ,st_exit_intra ,st_vshort_intra ,(int)((st_vshort_intra *100i64)/total) ,st_short_intra ,(int)((st_short_intra *100i64)/total) ,st_long_intra ,(int)((st_long_intra *100i64)/total) ,st_level1_idx0_intra ,(int)((st_level1_idx0_intra *100i64)/total) ,st_level1_idx1_intra ,(int)((st_level1_idx1_intra *100i64)/total) ,st_escape_intra ,(int)((st_escape_intra *100i64)/total) ,st_first_short_intra ,(int)((st_first_short_intra *100i64)/total) ,st_first_long_intra ,(int)((st_first_long_intra *100i64)/total) ); OutputDebugString(buf); } if (st_exit_inter && !(st_exit_inter & 262143)) { static char buf[256]; int total = st_exit_inter + st_short_inter + st_long_inter + st_level1_idx0_inter + st_level1_idx1_inter + st_vshort_inter + st_escape_inter; sprintf(buf, "[inter] %ld mblocks, %ld vshort (%d%%), %ld sh (%d%%), %ld ln (%d%%), %ld 1-0 (%d%%), %ld 1-1 (%d%%), %ld E (%d%%), %ld f-s (%d%%), %ld f-l (%d%%)\n" ,st_exit_inter ,st_vshort_inter ,(int)((st_vshort_inter *100i64)/total) ,st_short_inter ,(int)((st_short_inter *100i64)/total) ,st_long_inter ,(int)((st_long_inter *100i64)/total) ,st_level1_idx0_inter ,(int)((st_level1_idx0_inter *100i64)/total) ,st_level1_idx1_inter ,(int)((st_level1_idx1_inter *100i64)/total) ,st_escape_inter ,(int)((st_escape_inter *100i64)/total) ,st_first_short_inter ,(int)((st_first_short_inter *100i64)/total) ,st_first_long_inter ,(int)((st_first_long_inter *100i64)/total) ); OutputDebugString(buf); } #else if (!(st_exit & 262143)) { static char buf[256]; int total = st_exit + st_short + st_long + st_level1_idx0 + st_level1_idx1 + st_vshort + st_escape; sprintf(buf, "%ld mblocks, %ld vshort (%d%%), %ld sh (%d%%), %ld ln (%d%%), %ld 1-0 (%d%%), %ld 1-1 (%d%%), %ld E (%d%%), %ld f-s (%d%%), %ld f-l (%d%%)\n" ,st_exit ,st_vshort,(int)((st_vshort*100i64)/total) ,st_short,(int)((st_short*100i64)/total) ,st_long,(int)((st_long*100i64)/total) ,st_level1_idx0,(int)((st_level1_idx0*100i64)/total) ,st_level1_idx1,(int)((st_level1_idx1*100i64)/total) ,st_escape,(int)((st_escape*100i64)/total) ,st_first_short, (int)((st_first_short*100i64)/total) ,st_first_long, (int)((st_first_long*100i64)/total) ); OutputDebugString(buf); } #endif #endif dct_coeff[0] += ROUNDVAL; if (!pos || coeff_count<=1) { if (macro_block_flags & MBF_INTRA) IDCT_fast_put(pos, dst, modulo); else IDCT_fast_add(pos, dst, modulo); } else { IDCT(dst, modulo, macro_block_flags & MBF_INTRA); } } static void decode_mblock_MMX(YUVPixel *dst, long modulo, long DC_val) { const int *idx=zigzag_MMX; int level; int pos=0; const int *quant_matrix = intramatrix; unsigned long v; int sign = 0; signed short coeff0[67]; signed short *const coeff = (signed short *)(((long)coeff0 + 7) & 0xfffffff8); memset(coeff, 0, 64*sizeof(signed short)); coeff[0] = (DC_val + 128) >> 8; if (!(macro_block_flags & MBF_INTRA)) { quant_matrix = nonintramatrix; sign = 1; v = bits.peek(); if (v < 0x80000000) { idx = zigzag_MMX-1; coeff[0]=0; MB_STAT_INC(st_first_long); } else { coeff[0] = (((3*quant_matrix[0] + 8) >> 4) - 1) | 1; if (v & 0x40000000) coeff[0] = -coeff[0]; bits.skip(2); MB_STAT_INC(st_first_short); } } // macroblock statistics from nuku.mpg: // // 2883584 mblocks // 7142656 (42%) very short // 3457222 (20%) level 1, idx_run = 0 // 1604394 ( 9%) level 1, idx_run = 1 // 937613 ( 5%) long // 451518 ( 2%) short // 161300 ( 0%) escape // 1457101 (50%) first long // 705505 (24%) first short // nuku1.mpg: // 47710208 mblocks // 120471300 (43%) very short // 52059388 (18%) level 1, idx_run = 0 // 47710208 (17%) exits // 27499741 ( 9%) level 1, idx_run = 1 // 18368812 ( 6%) long // 7911139 ( 2%) short // 4948686 ( 1%) escape // 29281756 (61%) first long // 14165596 (29%) first short for(;;) { int level_sign = 0; v = bits.peek(); if (PREDICT(080, v >= 0x08000000)) { // 080-FFF (90%) if (PREDICT(600, v < 0x60000000)) { // 080-5FF very short (40%) int t = (v>>24); int bcnt; bcnt = mpeg_dct_coeff_decode0[t*4+2-32]; idx += mpeg_dct_coeff_decode0[t*4+0-32]; level = mpeg_dct_coeff_decode0[t*4+1-32]; _ASSERT(level != 0); bits.skip8(bcnt); // if (v & mpeg_dct_coeff_decode0[t*4+3-32]) // level_sign = -1; level_sign = (((signed long)(v>>20) & mpeg_dct_coeff_decode0[t*4+3-32])+0x7FFFFFFF) >> 31; MB_STAT_INC(st_vshort); } else if (PREDICT(C00, v >= 0xc0000000)) { // C00-FFF level1-idx0 (20%) bits.skipconst(3); ++idx; level = 1; // if (v & 0x200) // level_sign = -1; level_sign = (((signed long)v&0x20000000)+0x7FFFFFFF) >> 31; MB_STAT_INC(st_level1_idx0); } else if (PREDICT(800, v >= 0x80000000)) { // 800-BFF bits.skipconst(2); MB_STAT_INC(st_exit); break; } else { // 600-7FF bits.skipconst(4); idx += 2; level = 1; // if (v & 0x100) // level_sign = -1; level_sign = (((signed long)v&0x10000000)+0x7FFFFFFF) >> 31; MB_STAT_INC(st_level1_idx1); } } else { if (PREDICT(040, v >= 0x04000000)) { bits.skipconst(6); idx += bits.getconst(6)+1; level = bits.get_signed_const(8); // _ASSERT(level != 0); if (!(level & 0x7f)) { level <<= 1; level |= bits.getconst(8); } if (level<0) { level = -level; level_sign = -1; } MB_STAT_INC(st_escape); } else if (PREDICT(020, v >= 0x02000000)) { int t = (v>>22); idx += mpeg_dct_coeff_decode1[t*2+0-16]+1; level = mpeg_dct_coeff_decode1[t*2+1-16]; _ASSERT(level != 0); MB_STAT_INC(st_short); bits.skipconst(11); // if (v & 2) // level_sign = -1; level_sign = -((signed long)v&0x00200000)>>31; } else { int t, bcnt; MB_STAT_INC(st_long); bits.skipconst(7); v = bits.peek(10); t = v>>1; bcnt = mpeg_dct_coeff_decode2[t*4+2]; idx += mpeg_dct_coeff_decode2[t*4+0]+1; level = mpeg_dct_coeff_decode2[t*4+1]; bits.skip(bcnt); _ASSERT(level != 0); // if (v & (0x400>>bcnt)) // level_sign = -1; level_sign = -((signed long)v&(0x400>>bcnt))>>31; } } #ifdef DCT_POSITION_CHECKING if (idx >= zigzag_MMX+64) { pos = 63; break; } #endif pos = *idx; // quant_matrix: 0...255 // level: -256...255 _ASSERT(level != 0); // We need to oddify coefficients down toward zero. level = ((((level*2+sign) * quant_matrix[pos] + 8) >> 4) - 1) | 1; // Negate coefficient if necessary. coeff[pos] = (level ^ level_sign) - level_sign; } // ++lpos_stats[pos]; #ifdef DISPLAY_INTER_COUNT if (macro_block_flags & MBF_INTRA) { dct_coeff[0] = (32<<8)+ROUNDVAL; IDCT_fast_put(pos, dst, modulo); } else { dct_coeff[0] = (128<<8)+ROUNDVAL; IDCT_fast_add(pos, dst, modulo); } #else if (!pos) { dct_coeff[0] = (coeff[0]<<8)+ROUNDVAL; if (macro_block_flags & MBF_INTRA) IDCT_fast_put(pos, dst, modulo); else IDCT_fast_add(pos, dst, modulo); } else (ISSE_enabled ? IDCT_isse : IDCT_mmx)(coeff, dst, modulo, !!(macro_block_flags & MBF_INTRA), pos); #endif } static void decode_mblock_Y(YUVPixel *dst) { // memset(dct_coeff, 0, sizeof dct_coeff); #ifdef DEBUG for(int i=0; i<dct_coeff; i++) _ASSERT(dct_coeff[i] == 0); #endif if ((macro_block_flags & MBF_INTRA)) { int size, value = 0; { long v=bits.peek(7)*2; if (v < 64*2) { bits.skip8(2); size = (v>>6)+1; } else { size = mpeg_dct_size_luminance_decode[v - 64*2]; bits.skip(mpeg_dct_size_luminance_decode[v+1 - 64*2]); } } if (size) { int halfval; value = bits.get(size); halfval = 1 << (size-1); if (value < halfval) value = (value+1) - 2*halfval; value <<= 11; } (MMX_enabled ? decode_mblock_MMX : decode_mblock)(dst, y_pitch, dct_dc_y_past += value); } else (MMX_enabled ? decode_mblock_MMX : decode_mblock)(dst, y_pitch, 0); } static void decode_mblock_UV(YUVPixel *dst, int& dc_ref) { // memset(dct_coeff, 0, sizeof dct_coeff); #ifdef DEBUG for(int i=0; i<dct_coeff; i++) _ASSERT(dct_coeff[i] == 0); #endif if ((macro_block_flags & MBF_INTRA)) { int size, value=0; { long v=bits.peek(8)*2; if (v < 192*2) { size = v>>7; bits.skip8(2); } else { size = mpeg_dct_size_chrominance_decode[v - 192*2]; bits.skip(mpeg_dct_size_chrominance_decode[v+1 - 192*2]); } } if (size) { int halfval; value = bits.get(size); halfval = 1 << (size-1); if (value < halfval) value = (value+1) - 2*halfval; value <<= 11; } (MMX_enabled ? decode_mblock_MMX : decode_mblock)(dst, uv_pitch, dc_ref += value); } else (MMX_enabled ? decode_mblock_MMX : decode_mblock)(dst, uv_pitch, 0); } /////////////////////////// static void video_process_picture_slice_I(char *ptr, int type) { long pos_x, pos_y; pos_y = type-1; pos_x = -1; dstY = Y_dest + 16 * y_pitch * pos_y - 16; dstU = U_dest + 8 * uv_pitch * pos_y - 8; dstV = V_dest + 8 * uv_pitch * pos_y - 8; do { int inc = 0; int i; // 00000001111 (00F) -> padding // 00000001000 (008) -> skip 33 more // 1 -> skip 1 i = bits.peek(11); while(i == 0xf) { bits.skip(11); i = bits.peek(11); } while(i == 0x8) { bits.skip(11); i = bits.peek(11); inc += 33; dct_dc_y_past = dct_dc_u_past = dct_dc_v_past = MIDVAL; } if (i&0x400) { bits.skip(); ++inc; } else if (i>=96) { i>>=4; bits.skip(mpeg_macro_block_inc_decode2[i*2+1-12]); inc += mpeg_macro_block_inc_decode2[i*2+0-12]; } else { bits.skip(mpeg_macro_block_inc_decode[i*2+1]); inc += mpeg_macro_block_inc_decode[i*2+0]; } pos_x += inc; while(pos_x >= mbWidth) { pos_x-=mbWidth; pos_y++; if (pos_y >= mbHeight) return; dstY += y_modulo; dstU += uv_modulo; dstV += uv_modulo; } dstY += 16*inc; dstU += 8*inc; dstV += 8*inc; _ASSERT(dstY >= Y_dest); _ASSERT(dstY <= Y_dest + y_pitch * 16 * (mbHeight-1) + 16*(mbWidth-1)); macro_block_flags = MBF_INTRA; if (!bits.get_flag()) { // macro_block_flags |= MBF_NEW_QUANT; bits.skip(); int quant_scale = bits.get8(5); intramatrix = intramatrices[quant_scale]; nonintramatrix = nonintramatrices[quant_scale]; } decode_mblock_Y(dstY); decode_mblock_Y(dstY + 8); decode_mblock_Y(dstY + y_pitch*8); decode_mblock_Y(dstY + y_pitch*8 + 8); decode_mblock_UV(dstU, dct_dc_u_past); decode_mblock_UV(dstV, dct_dc_v_past); } while(!bits.next(23,0)); } static int __inline mpeg_get_motion_component() { long v; v = bits.peek(11); if (v & 0x400) { bits.skip(); return 0; } else if (v >= 96) { v = (v-96)>>4; bits.skip(mpeg_motion_code_decode2[v*2+1]); return mpeg_motion_code_decode2[v*2+0]; } else { bits.skip(mpeg_motion_code_decode[v*2+1]); return mpeg_motion_code_decode[v*2+0]; } } static void video_process_picture_slice_P(char *ptr, int type) { BOOL is_first_block = TRUE; long pos_x, pos_y; pos_y = type-1; pos_x = -1; dstY = Y_dest + 16 * y_pitch * pos_y - 16; dstU = U_dest + 8 * uv_pitch * pos_y - 8; dstV = V_dest + 8 * uv_pitch * pos_y - 8; do { int inc = 0; signed char cbp = 0x3f; int i; // 00000001111 (00F) -> padding // 00000001000 (008) -> skip 33 more // 1 -> skip 1 i = bits.peek(11); while(i == 0xf) { bits.skip(11); i = bits.peek(11); } while(i == 0x8) { bits.skip(11); i = bits.peek(11); inc += 33; } if (i&0x400) { bits.skip(); ++inc; } else if (i>=96) { i >>= 4; bits.skip(mpeg_macro_block_inc_decode2[i*2+1-12]); inc += mpeg_macro_block_inc_decode2[i*2+0-12]; } else { bits.skip(mpeg_macro_block_inc_decode[i*2+1]); inc += mpeg_macro_block_inc_decode[i*2+0]; } // _RPT3(0,"(%d,%d): inc %d\n", (pos_x+1), pos_y + (pos_x==mbWidth-1?1:0), inc); // _RPT1(0,"skip: %ld\n", inc); if (inc > 1) { dct_dc_y_past = dct_dc_u_past = dct_dc_v_past = MIDVAL; forw_vector_x = forw_vector_y = 0; if (pos_x >= 0) for(i=1; i<inc; i++) { ++pos_x; dstY += 16; dstU += 8; dstV += 8; if(pos_x >= mbWidth) { pos_x-=mbWidth; pos_y++; if (pos_y >= mbHeight) return; dstY += y_modulo; dstU += uv_modulo; dstV += uv_modulo; } video_copy_forward(pos_x, pos_y); } else { pos_x += inc-1; dstY += 16*(inc-1); dstU += 8*(inc-1); dstV += 8*(inc-1); } // } else pos_x += inc-1; } ++pos_x; while (pos_x >= mbWidth) { pos_x-=mbWidth; pos_y++; if (pos_y >= mbHeight) return; dstY += y_modulo; dstU += uv_modulo; dstV += uv_modulo; } dstY += 16; dstU += 8; dstV += 8; _ASSERT(dstY >= Y_dest); _ASSERT(dstY <= Y_dest + y_pitch * 16 * (mbHeight-1) + 16*(mbWidth-1)); { long v=bits.peek(6); if (v>=32) { macro_block_flags = MBF_FORWARD | MBF_PATTERN; bits.skip(); } else { macro_block_flags = mpeg_p_type_mb_type_decode[v*2]; bits.skip(mpeg_p_type_mb_type_decode[v*2+1]); } } if (!(macro_block_flags & MBF_INTRA)) { dct_dc_y_past = dct_dc_u_past = dct_dc_v_past = MIDVAL; cbp = 0; } if (macro_block_flags & MBF_NEW_QUANT) { int quant_scale = bits.get8(5); intramatrix = intramatrices[quant_scale]; nonintramatrix = nonintramatrices[quant_scale]; } if (macro_block_flags & MBF_FORWARD) { // motion vector for forward prediction exists int motion_x_forw_c, motion_y_forw_c; int motion_x_forw_r, motion_y_forw_r; int delta; motion_x_forw_c = mpeg_get_motion_component(); // according to this information the motion vector must be decoded if ((signed char)forw_vector_bits<=0 || motion_x_forw_c==0) delta = motion_x_forw_c; else { motion_x_forw_r = bits.get(forw_vector_bits)+1; if (motion_x_forw_c<0) delta = -(((-motion_x_forw_c-1)<<forw_vector_bits) + motion_x_forw_r); else delta = ((motion_x_forw_c-1)<<forw_vector_bits) + motion_x_forw_r; } forw_vector_x = (((forw_vector_x + delta) & forw_vector_mask) + forw_vector_extend) ^ forw_vector_extend; motion_y_forw_c = mpeg_get_motion_component(); if ((signed char)forw_vector_bits<=0 || motion_y_forw_c==0) delta = motion_y_forw_c; else { motion_y_forw_r = bits.get(forw_vector_bits)+1; if (motion_y_forw_c<0) delta = -(((-motion_y_forw_c-1)<<forw_vector_bits) + motion_y_forw_r); else delta = ((motion_y_forw_c-1)<<forw_vector_bits) + motion_y_forw_r; } forw_vector_y = (((forw_vector_y + delta) & forw_vector_mask) + forw_vector_extend) ^ forw_vector_extend; // grab the referred area into "pel1" //_RPT4(0,"(%d,%d) copy motion vector (%+d,%+d)\n", pos_x, pos_y, forw_vector_x, forw_vector_y); // video_copy_forward_prediction(dstY, dstU, dstV, pos_x, pos_y); } else { // (only) in P_TYPE the motion vector is to be reset. forw_vector_x = forw_vector_y = 0; //_RPT2(0,"(%d,%d) reset motion vector\n", pos_x, pos_y); video_copy_forward(pos_x, pos_y); } if ((macro_block_flags & MBF_PATTERN)) { #if 0 long v = bits.peek(9)*2; cbp = mpeg_block_pattern_decode[v+0]; bits.skip(mpeg_block_pattern_decode[v+1]); #else long v = bits.peek(9); if (v >= 128) { v>>=4; cbp = mpeg_block_pattern_decode0[v*2+0-16]; bits.skip(mpeg_block_pattern_decode0[v*2+1-16]); } else { cbp = mpeg_block_pattern_decode1[v*2+0]; bits.skip(mpeg_block_pattern_decode1[v*2+1]); } #endif } if (macro_block_flags & MBF_FORWARD) video_copy_forward_prediction(pos_x, pos_y, false); if (cbp & 0x20) decode_mblock_Y(dstY); if (cbp & 0x10) decode_mblock_Y(dstY + 8); if (cbp & 0x08) decode_mblock_Y(dstY + y_pitch*8); if (cbp & 0x04) decode_mblock_Y(dstY + y_pitch*8 + 8); if (macro_block_flags & MBF_FORWARD) video_copy_forward_prediction(pos_x, pos_y, true); if (cbp & 0x02) decode_mblock_UV(dstU, dct_dc_u_past); if (cbp & 0x01) decode_mblock_UV(dstV, dct_dc_v_past); if (!(macro_block_flags & MBF_INTRA)) dct_dc_y_past = dct_dc_u_past = dct_dc_v_past = MIDVAL; is_first_block=FALSE; #ifdef STATISTICS if (macro_block_flags & MBF_PATTERN) stats.coded_block_pattern++; #endif } while(!bits.next(23,0)); } static void video_process_picture_slice_B(char *ptr, int type) { BOOL is_first_block = TRUE; long pos_x, pos_y; pos_y = type-1; pos_x = -1; dstY = Y_dest + 16 * y_pitch * pos_y - 16; dstU = U_dest + 8 * uv_pitch * pos_y - 8; dstV = V_dest + 8 * uv_pitch * pos_y - 8; do { int inc = 0; signed char cbp = 0x3f; int i; // 00000001111 (00F) -> padding // 00000001000 (008) -> skip 33 more // 1 -> skip 1 i = bits.peek(11); while(i == 0xf) { bits.skip(11); i = bits.peek(11); } while(i == 0x8) { bits.skip(11); i = bits.peek(11); inc += 33; } if (i&0x400) { bits.skip(); ++inc; } else if (i>=96) { i >>= 4; bits.skip8(mpeg_macro_block_inc_decode2[i*2+1-12]); inc += mpeg_macro_block_inc_decode2[i*2+0-12]; } else { bits.skip8(mpeg_macro_block_inc_decode[i*2+1]); inc += mpeg_macro_block_inc_decode[i*2+0]; } if (inc > 1) { dct_dc_y_past = dct_dc_u_past = dct_dc_v_past = MIDVAL; if (!is_first_block) { for(i=1; i<inc; i++) { ++pos_x; dstY += 16; dstU += 8; dstV += 8; if(pos_x >= mbWidth) { pos_x-=mbWidth; pos_y++; if (pos_y >= mbHeight) return; dstY += y_modulo; dstU += uv_modulo; dstV += uv_modulo; } if ((macro_block_flags & MBF_FORWARD)) { video_copy_forward_prediction( pos_x, pos_y, false); if ((macro_block_flags & MBF_BACKWARD)) video_add_backward_prediction( pos_x, pos_y, false); } else if ((macro_block_flags & MBF_BACKWARD)) { video_copy_backward_prediction( pos_x, pos_y, false); } if ((macro_block_flags & MBF_FORWARD)) { video_copy_forward_prediction( pos_x, pos_y, true); if ((macro_block_flags & MBF_BACKWARD)) video_add_backward_prediction( pos_x, pos_y, true); } else if ((macro_block_flags & MBF_BACKWARD)) { video_copy_backward_prediction( pos_x, pos_y, true); } } } else { pos_x += inc-1; dstY += 16*(inc-1); dstU += 8*(inc-1); dstV += 8*(inc-1); } } ++pos_x; while (pos_x >= mbWidth) { pos_x -= mbWidth; ++pos_y; if (pos_y >= mbHeight) return; dstY += y_modulo; dstU += uv_modulo; dstV += uv_modulo; } dstY += 16; dstU += 8; dstV += 8; _ASSERT(dstY >= Y_dest); _ASSERT(dstY <= Y_dest + y_pitch * 16 * (mbHeight-1) + 16*(mbWidth-1)); { long v=bits.peek(6); if (v>=32) { macro_block_flags = MBF_FORWARD | MBF_BACKWARD; if (v>=48) macro_block_flags = MBF_FORWARD | MBF_BACKWARD | MBF_PATTERN; bits.skip8(2); } else { macro_block_flags = mpeg_b_type_mb_type_decode[v*2]; bits.skip8(mpeg_b_type_mb_type_decode[v*2+1]); } } if (!(macro_block_flags & MBF_INTRA)) { dct_dc_y_past = dct_dc_u_past = dct_dc_v_past = MIDVAL; cbp = 0; } if ((macro_block_flags & MBF_NEW_QUANT)) { int quant_scale = bits.get8(5); intramatrix = intramatrices[quant_scale]; nonintramatrix = nonintramatrices[quant_scale]; } if ((macro_block_flags & MBF_FORWARD)) { // motion vector for forward prediction exists int motion_x_forw_c, motion_y_forw_c; int motion_x_forw_r, motion_y_forw_r; int delta; motion_x_forw_c = mpeg_get_motion_component(); // according to this information the motion vector must be decoded if ((signed char)forw_vector_bits<=0 || motion_x_forw_c==0) delta = motion_x_forw_c; else { motion_x_forw_r = bits.get(forw_vector_bits)+1; if (motion_x_forw_c<0) delta = -(((-motion_x_forw_c-1)<<forw_vector_bits) + motion_x_forw_r); else delta = ((motion_x_forw_c-1)<<forw_vector_bits) + motion_x_forw_r; } forw_vector_x = (((forw_vector_x + delta) & forw_vector_mask) + forw_vector_extend) ^ forw_vector_extend; motion_y_forw_c = mpeg_get_motion_component(); if ((signed char)forw_vector_bits<=0 || motion_y_forw_c==0) delta = motion_y_forw_c; else { motion_y_forw_r = bits.get(forw_vector_bits)+1; if (motion_y_forw_c<0) delta = -(((-motion_y_forw_c-1)<<forw_vector_bits) + motion_y_forw_r); else delta = ((motion_y_forw_c-1)<<forw_vector_bits) + motion_y_forw_r; } forw_vector_y = (((forw_vector_y + delta) & forw_vector_mask) + forw_vector_extend) ^ forw_vector_extend; } if ((macro_block_flags & MBF_BACKWARD)) { int motion_x_back_c, motion_y_back_c; int motion_x_back_r, motion_y_back_r; int delta; motion_x_back_c = mpeg_get_motion_component(); // according to this information the motion vector must be decoded if ((signed char)back_vector_bits<=0 || motion_x_back_c==0) delta = motion_x_back_c; else { motion_x_back_r = bits.get(back_vector_bits)+1; if (motion_x_back_c<0) delta = -(((-motion_x_back_c-1)<<back_vector_bits) + motion_x_back_r); else delta = ((motion_x_back_c-1)<<back_vector_bits) + motion_x_back_r; } back_vector_x = (((back_vector_x + delta) & back_vector_mask) + back_vector_extend) ^ back_vector_extend; motion_y_back_c = mpeg_get_motion_component(); if ((signed char)back_vector_bits<=0 || motion_y_back_c==0) delta = motion_y_back_c; else { motion_y_back_r = bits.get(back_vector_bits)+1; if (motion_y_back_c<0) delta = -(((-motion_y_back_c-1)<<back_vector_bits) + motion_y_back_r); else delta = ((motion_y_back_c-1)<<back_vector_bits) + motion_y_back_r; } back_vector_y = (((back_vector_y + delta) & back_vector_mask) + back_vector_extend) ^ back_vector_extend; } if ((macro_block_flags & MBF_PATTERN)) { long v = bits.peek(9); if (v >= 128) { v>>=4; cbp = mpeg_block_pattern_decode0[v*2+0-16]; bits.skip8(mpeg_block_pattern_decode0[v*2+1-16]); } else { cbp = mpeg_block_pattern_decode1[v*2+0]; bits.skip(mpeg_block_pattern_decode1[v*2+1]); } } if (macro_block_flags & MBF_FORWARD) { video_copy_forward_prediction(pos_x, pos_y, false); if (macro_block_flags & MBF_BACKWARD) video_add_backward_prediction(pos_x, pos_y, false); } else if (macro_block_flags & MBF_BACKWARD) video_copy_backward_prediction(pos_x, pos_y, false); if (cbp & 0x20) decode_mblock_Y(dstY); if (cbp & 0x10) decode_mblock_Y(dstY + 8); if (cbp & 0x08) decode_mblock_Y(dstY + y_pitch*8); if (cbp & 0x04) decode_mblock_Y(dstY + y_pitch*8 + 8); if (macro_block_flags & MBF_FORWARD) { video_copy_forward_prediction(pos_x, pos_y, true); if (macro_block_flags & MBF_BACKWARD) video_add_backward_prediction(pos_x, pos_y, true); } else if (macro_block_flags & MBF_BACKWARD) video_copy_backward_prediction(pos_x, pos_y, true); if (cbp & 0x02) decode_mblock_UV(dstU, dct_dc_u_past); if (cbp & 0x01) decode_mblock_UV(dstV, dct_dc_v_past); if (macro_block_flags & MBF_INTRA) forw_vector_x = forw_vector_y = back_vector_x = back_vector_y = 0; else dct_dc_y_past = dct_dc_u_past = dct_dc_v_past = MIDVAL; is_first_block=FALSE; #ifdef STATISTICS if (macro_block_flags & MBF_PATTERN) stats.coded_block_pattern++; #endif } while(!bits.next(23,0)); } static void video_process_picture_slice(char *ptr, int type) { int quant_scale; bits = CMemoryBitInput(ptr); dct_dc_y_past = dct_dc_u_past = dct_dc_v_past = MIDVAL; forw_vector_x = forw_vector_y = 0; back_vector_x = back_vector_y = 0; quant_scale = bits.get(5); intramatrix = intramatrices[quant_scale]; nonintramatrix = nonintramatrices[quant_scale]; memset(dct_coeff, 0, sizeof dct_coeff); while(bits.get()) bits.skip(8); if (type > mbHeight) return; switch(frame_type) { case I_FRAME: video_process_picture_slice_I(ptr, type); break; case P_FRAME: video_process_picture_slice_P(ptr, type); break; case B_FRAME: video_process_picture_slice_B(ptr, type); break; } } ////////////////////////////////////////////////////////////// extern "C" void asm_YUVtoRGB32_row( unsigned long *ARGB1_pointer, unsigned long *ARGB2_pointer, YUVPixel *Y1_pointer, YUVPixel *Y2_pointer, YUVPixel *U_pointer, YUVPixel *V_pointer, long width ); extern "C" void asm_YUVtoRGB24_row( unsigned long *ARGB1_pointer, unsigned long *ARGB2_pointer, YUVPixel *Y1_pointer, YUVPixel *Y2_pointer, YUVPixel *U_pointer, YUVPixel *V_pointer, long width ); extern "C" void asm_YUVtoRGB16_row( unsigned long *ARGB1_pointer, unsigned long *ARGB2_pointer, YUVPixel *Y1_pointer, YUVPixel *Y2_pointer, YUVPixel *U_pointer, YUVPixel *V_pointer, long width ); static void YUVToRGB32(YUVPixel *Y_ptr, YUVPixel *U_ptr, YUVPixel *V_ptr, unsigned char *dst, long bpr, long w, long h) { dst = dst + bpr * (2*h - 2); do { asm_YUVtoRGB32_row( (unsigned long *)(dst + bpr), (unsigned long *)dst, Y_ptr, Y_ptr + y_pitch, U_ptr, V_ptr, w); dst = dst - 2*bpr; Y_ptr = Y_ptr + 2*y_pitch; U_ptr = U_ptr + uv_pitch; V_ptr = V_ptr + uv_pitch; } while(--h); if (MMX_enabled) __asm emms if (ISSE_enabled) __asm sfence } static void YUVToRGB24(YUVPixel *Y_ptr, YUVPixel *U_ptr, YUVPixel *V_ptr, unsigned char *dst, long bpr, long w, long h) { dst = dst + bpr * (2*h - 2); do { asm_YUVtoRGB24_row( (unsigned long *)(dst + bpr), (unsigned long *)dst, Y_ptr, Y_ptr + y_pitch, U_ptr, V_ptr, w); dst = dst - 2*bpr; Y_ptr = Y_ptr + 2*y_pitch; U_ptr = U_ptr + uv_pitch; V_ptr = V_ptr + uv_pitch; } while(--h); if (MMX_enabled) __asm emms if (ISSE_enabled) __asm sfence } static void YUVToRGB16(YUVPixel *Y_ptr, YUVPixel *U_ptr, YUVPixel *V_ptr, unsigned char *dst, long bpr, long w, long h) { dst = dst + bpr * (2*h - 2); do { asm_YUVtoRGB16_row( (unsigned long *)(dst + bpr), (unsigned long *)dst, Y_ptr, Y_ptr + y_pitch, U_ptr, V_ptr, w); dst = dst - 2*bpr; Y_ptr = Y_ptr + 2*y_pitch; U_ptr = U_ptr + uv_pitch; V_ptr = V_ptr + uv_pitch; } while(--h); if (MMX_enabled) __asm emms if (ISSE_enabled) __asm sfence } static void __declspec(naked) YUVtoUYVY16_MMX( YUVPixel *Y_ptr, // [esp+4+16] YUVPixel *U_ptr, // [esp+8+16] YUVPixel *V_ptr, // [esp+12+16] unsigned char *dst, // [esp+16+16] long bpr, // [esp+20+16] long w, // [esp+24+16] long h) { // [esp+28+16] __asm { push ebp push edi push esi push ebx mov edx,[esp+24+16] ;load width (mult of 8) mov ebx,[esp+8+16] ;load source U ptr mov ecx,[esp+12+16] ;load source V ptr mov eax,[esp+4+16] ;load source Y ptr mov edi,[esp+16+16] ;load destination ptr mov esi,[esp+20+16] ;load destination pitch mov ebp,[esp+28+16] ;load height lea ebx,[ebx+edx] ;bias pointers lea ecx,[ecx+edx] ;(we count from -n to 0) lea eax,[eax+edx*2] lea edi,[edi+edx*4] neg edx mov [esp+24+16],edx xyloop: movq mm0,[ebx+edx] ;U0-U7 movq mm7,[ecx+edx] ;V0-V7 movq mm2,mm0 ;U0-U7 movq mm4,[eax+edx*2] punpcklbw mm0,mm7 ;[V3][U3][V2][U2][V1][U1][V0][U0] movq mm5,[eax+edx*2+8] punpckhbw mm2,mm7 ;[V7][U7][V6][U6][V5][U5][V4][U4] movq mm1,mm0 punpcklbw mm0,mm4 ;[Y3][V1][Y2][U1][Y1][V0][Y0][U0] punpckhbw mm1,mm4 ;[Y7][V3][Y6][U3][Y5][V2][Y4][U2] movq mm3,mm2 movq [edi+edx*4+ 0],mm0 punpcklbw mm2,mm5 ;[YB][V5][YA][U5][Y9][V4][Y8][U4] movq [edi+edx*4+ 8],mm1 punpckhbw mm3,mm5 ;[YF][V7][YE][U7][YD][V6][YC][U6] movq [edi+edx*4+16],mm2 movq [edi+edx*4+24],mm3 add edx,8 jnc xyloop mov edx,[esp+24+16] ;reload width counter test ebp,1 ;update U/V row every other row only jz oddline sub ebx,edx ;advance U pointer sub ecx,edx ;advance V pointer oddline: sub eax,edx ;advance Y pointer sub eax,edx ;advance Y pointer add edi,esi ;advance dest ptr dec ebp jne xyloop pop ebx pop esi pop edi pop ebp emms ret } } static void YUVToUYVY16(YUVPixel *Y_ptr, YUVPixel *U_ptr, YUVPixel *V_ptr, unsigned char *dst, long bpr, long w, long h) { if (MMX_enabled) { YUVtoUYVY16_MMX(Y_ptr, U_ptr, V_ptr, dst, bpr, w, h*2); return; } do { int wt; wt = w/8; do { char z = *dst; char z2 = dst[31]; *dst++ = *U_ptr++; *dst++ = *Y_ptr++; *dst++ = *V_ptr++; *dst++ = *Y_ptr++; *dst++ = *U_ptr++; *dst++ = *Y_ptr++; *dst++ = *V_ptr++; *dst++ = *Y_ptr++; *dst++ = *U_ptr++; *dst++ = *Y_ptr++; *dst++ = *V_ptr++; *dst++ = *Y_ptr++; *dst++ = *U_ptr++; *dst++ = *Y_ptr++; *dst++ = *V_ptr++; *dst++ = *Y_ptr++; *dst++ = *U_ptr++; *dst++ = *Y_ptr++; *dst++ = *V_ptr++; *dst++ = *Y_ptr++; *dst++ = *U_ptr++; *dst++ = *Y_ptr++; *dst++ = *V_ptr++; *dst++ = *Y_ptr++; *dst++ = *U_ptr++; *dst++ = *Y_ptr++; *dst++ = *V_ptr++; *dst++ = *Y_ptr++; *dst++ = *U_ptr++; *dst++ = *Y_ptr++; *dst++ = *V_ptr++; *dst++ = *Y_ptr++; } while(--wt); U_ptr = U_ptr - uv_pitch; V_ptr = V_ptr - uv_pitch; wt = w/8; do { char z = *dst; char z2 = dst[31]; *dst++ = *U_ptr++; *dst++ = *Y_ptr++; *dst++ = *V_ptr++; *dst++ = *Y_ptr++; *dst++ = *U_ptr++; *dst++ = *Y_ptr++; *dst++ = *V_ptr++; *dst++ = *Y_ptr++; *dst++ = *U_ptr++; *dst++ = *Y_ptr++; *dst++ = *V_ptr++; *dst++ = *Y_ptr++; *dst++ = *U_ptr++; *dst++ = *Y_ptr++; *dst++ = *V_ptr++; *dst++ = *Y_ptr++; *dst++ = *U_ptr++; *dst++ = *Y_ptr++; *dst++ = *V_ptr++; *dst++ = *Y_ptr++; *dst++ = *U_ptr++; *dst++ = *Y_ptr++; *dst++ = *V_ptr++; *dst++ = *Y_ptr++; *dst++ = *U_ptr++; *dst++ = *Y_ptr++; *dst++ = *V_ptr++; *dst++ = *Y_ptr++; *dst++ = *U_ptr++; *dst++ = *Y_ptr++; *dst++ = *V_ptr++; *dst++ = *Y_ptr++; } while(--wt); } while(--h); } static void __declspec(naked) YUVtoYUY2_MMX( YUVPixel *Y_ptr, // [esp+4+16] YUVPixel *U_ptr, // [esp+8+16] YUVPixel *V_ptr, // [esp+12+16] unsigned char *dst, // [esp+16+16] long bpr, // [esp+20+16] long w, // [esp+24+16] long h) { // [esp+28+16] __asm { push ebp push edi push esi push ebx mov edx,[esp+24+16] ;multiply width by 8 mov ebx,[esp+8+16] ;load source U ptr mov ecx,[esp+12+16] ;load source V ptr mov eax,[esp+4+16] ;load source Y ptr mov edi,[esp+16+16] ;load destination ptr mov esi,[esp+20+16] ;load destination pitch mov ebp,[esp+28+16] ;load height lea ebx,[ebx+edx] ;bias pointers lea ecx,[ecx+edx] ;(we count from -n to 0) lea eax,[eax+edx*2] lea edi,[edi+edx*4] neg edx mov [esp+24+16],edx xyloop: movq mm0,[ebx+edx] ;U0-U7 movq mm7,[ecx+edx] ;V0-V7 movq mm1,mm0 ;U0-U7 movq mm2,[eax+edx*2] ;Y0-Y7 punpcklbw mm0,mm7 ;[V3][U3][V2][U2][V1][U1][V0][U0] movq mm4,[eax+edx*2+8] ;Y8-YF punpckhbw mm1,mm7 ;[V7][U7][V6][U6][V5][U5][V4][U4] movq mm3,mm2 punpcklbw mm2,mm0 ;[V1][Y3][U1][Y2][V0][Y1][U0][Y0] movq mm5,mm4 punpckhbw mm3,mm0 ;[V3][Y7][U3][Y6][V2][Y5][U2][Y4] movq [edi+edx*4+ 0],mm2 punpcklbw mm4,mm1 ;[V5][YB][U5][YA][V4][Y9][U4][Y8] movq [edi+edx*4+ 8],mm3 punpckhbw mm5,mm1 ;[V7][YF][U7][YE][V6][YD][U6][YC] movq [edi+edx*4+16],mm4 movq [edi+edx*4+24],mm5 add edx,8 jnc xyloop mov edx,[esp+24+16] ;reload width counter test ebp,1 ;update U/V row every other row only jz oddline sub ebx,edx ;advance U pointer sub ecx,edx ;advance V pointer oddline: sub eax,edx ;advance Y pointer sub eax,edx ;advance Y pointer add edi,esi ;advance dest ptr dec ebp jne xyloop pop ebx pop esi pop edi pop ebp emms ret } } static void YUVToYUY216(YUVPixel *Y_ptr, YUVPixel *U_ptr, YUVPixel *V_ptr, unsigned char *dst, long bpr, long w, long h) { if (MMX_enabled) { YUVtoYUY2_MMX(Y_ptr, U_ptr, V_ptr, dst, bpr, w, h*2); return; } do { int wt; wt = w/8; do { char z = *dst; *dst++ = *Y_ptr++; *dst++ = *U_ptr++; *dst++ = *Y_ptr++; *dst++ = *V_ptr++; *dst++ = *Y_ptr++; *dst++ = *U_ptr++; *dst++ = *Y_ptr++; *dst++ = *V_ptr++; *dst++ = *Y_ptr++; *dst++ = *U_ptr++; *dst++ = *Y_ptr++; *dst++ = *V_ptr++; *dst++ = *Y_ptr++; *dst++ = *U_ptr++; *dst++ = *Y_ptr++; *dst++ = *V_ptr++; *dst++ = *Y_ptr++; *dst++ = *U_ptr++; *dst++ = *Y_ptr++; *dst++ = *V_ptr++; *dst++ = *Y_ptr++; *dst++ = *U_ptr++; *dst++ = *Y_ptr++; *dst++ = *V_ptr++; *dst++ = *Y_ptr++; *dst++ = *U_ptr++; *dst++ = *Y_ptr++; *dst++ = *V_ptr++; *dst++ = *Y_ptr++; *dst++ = *U_ptr++; *dst++ = *Y_ptr++; *dst++ = *V_ptr++; } while(--wt); U_ptr = U_ptr - uv_pitch; V_ptr = V_ptr - uv_pitch; wt = w/8; do { char z = *dst; *dst++ = *Y_ptr++; *dst++ = *U_ptr++; *dst++ = *Y_ptr++; *dst++ = *V_ptr++; *dst++ = *Y_ptr++; *dst++ = *U_ptr++; *dst++ = *Y_ptr++; *dst++ = *V_ptr++; *dst++ = *Y_ptr++; *dst++ = *U_ptr++; *dst++ = *Y_ptr++; *dst++ = *V_ptr++; *dst++ = *Y_ptr++; *dst++ = *U_ptr++; *dst++ = *Y_ptr++; *dst++ = *V_ptr++; *dst++ = *Y_ptr++; *dst++ = *U_ptr++; *dst++ = *Y_ptr++; *dst++ = *V_ptr++; *dst++ = *Y_ptr++; *dst++ = *U_ptr++; *dst++ = *Y_ptr++; *dst++ = *V_ptr++; *dst++ = *Y_ptr++; *dst++ = *U_ptr++; *dst++ = *Y_ptr++; *dst++ = *V_ptr++; *dst++ = *Y_ptr++; *dst++ = *U_ptr++; *dst++ = *Y_ptr++; *dst++ = *V_ptr++; } while(--wt); } while(--h); } ////////////////////////////////////////////////////////////// #pragma warning(push) #pragma warning(disable: 4799) // function has no EMMS instruction void video_copy_forward(int x_pos, int y_pos) { YUVPixel *Y_src, *U_src, *V_src; Y_src = Y_forw + 16*x_pos + y_pitch*(16*y_pos); U_src = U_forw + 8*x_pos + uv_pitch*( 8*y_pos); V_src = V_forw + 8*x_pos + uv_pitch*( 8*y_pos); // luminance if (MMX_enabled) __asm { push esi push edi mov esi,y_pitch mov ecx,Y_src mov edx,dstY mov edi,8 loop_Y_MMX: movq mm0,[ecx] movq mm1,[ecx+8] movq [edx],mm0 movq mm2,[ecx+esi] movq [edx+8],mm1 movq mm3,[ecx+esi+8] movq [edx+esi],mm2 movq [edx+esi+8],mm3 lea ecx,[ecx+esi*2] dec edi lea edx,[edx+esi*2] jne loop_Y_MMX mov ecx,U_src mov edx,dstU mov esi,uv_pitch mov edi,4 loop_U_MMX: movq mm0,[ecx] movq mm1,[ecx+esi] movq [edx],mm0 movq [edx+esi],mm1 lea ecx,[ecx+esi*2] dec edi lea edx,[edx+esi*2] jne loop_U_MMX mov ecx,V_src mov edx,dstV mov edi,4 loop_V_MMX: movq mm0,[ecx] movq mm1,[ecx+esi] movq [edx],mm0 movq [edx+esi],mm1 lea ecx,[ecx+esi*2] dec edi lea edx,[edx+esi*2] jne loop_V_MMX pop edi pop esi } else __asm { push esi push edi mov esi,y_pitch mov ecx,Y_src mov edx,dstY mov edi,16 loop_Y: mov eax,[ecx] mov ebx,[ecx+4] mov [edx],eax mov [edx+4],ebx mov eax,[ecx+8] mov ebx,[ecx+12] mov [edx+8],eax mov [edx+12],ebx add ecx,esi add edx,esi dec edi jne loop_Y mov ecx,U_src mov edx,dstU mov esi,uv_pitch mov edi,8 loop_U: mov eax,[ecx] mov ebx,[ecx+4] mov [edx],eax mov [edx+4],ebx add ecx,esi add edx,esi dec edi jne loop_U mov ecx,V_src mov edx,dstV mov edi,8 loop_V: mov eax,[ecx] mov ebx,[ecx+4] mov [edx],eax mov [edx+4],ebx add ecx,esi add edx,esi dec edi jne loop_V pop edi pop esi } } #pragma warning(pop) static void video_copy_forward_prediction(int x_pos, int y_pos, bool fchrom) { long vx = forw_vector_x; long vy = forw_vector_y; long vxY, vyY, vxC, vyC; if (forw_vector_full_pel) { vx <<= 1; vy <<= 1; } vxY = vx + 32*x_pos; vyY = vy + 32*y_pos; vxC = vx/2 + 16*x_pos; vyC = vy/2 + 16*y_pos; if (vxY<0 || vyY<0 || vxY>vector_limit_x || vyY>vector_limit_y) vxY = vyY = vxC = vyC = 0; if (!fchrom) video_copy_prediction_Y(Y_forw, dstY, vxY, vyY, y_pitch); else { video_copy_prediction_C(U_forw, dstU, vxC, vyC, uv_pitch); video_copy_prediction_C(V_forw, dstV, vxC, vyC, uv_pitch); } } static void video_copy_backward_prediction(int x_pos, int y_pos, bool fchrom) { long vx = back_vector_x; long vy = back_vector_y; long vxY, vyY, vxC, vyC; if (back_vector_full_pel) { vx <<= 1; vy <<= 1; } vxY = vx + 32*x_pos; vyY = vy + 32*y_pos; vxC = vx/2 + 16*x_pos; vyC = vy/2 + 16*y_pos; if (vxY<0 || vyY<0 || vxY>vector_limit_x || vyY>vector_limit_y) vxY = vyY = vxC = vyC = 0; if (!fchrom) video_copy_prediction_Y(Y_back, dstY, vxY, vyY, y_pitch); else { video_copy_prediction_C(U_back, dstU, vxC, vyC, uv_pitch); video_copy_prediction_C(V_back, dstV, vxC, vyC, uv_pitch); } } static void video_add_backward_prediction(int x_pos, int y_pos, bool fchrom) { long vx = back_vector_x; long vy = back_vector_y; long vxY, vyY, vxC, vyC; if (back_vector_full_pel) { vx <<= 1; vy <<= 1; } vxY = vx + 32*x_pos; vyY = vy + 32*y_pos; vxC = vx/2 + 16*x_pos; vyC = vy/2 + 16*y_pos; if (vxY<0 || vyY<0 || vxY>vector_limit_x || vyY>vector_limit_y) vxY = vyY = vxC = vyC = 0; if (!fchrom) video_add_prediction_Y(Y_back, dstY, vxY, vyY, y_pitch); else { video_add_prediction_C(U_back, dstU, vxC, vyC, uv_pitch); video_add_prediction_C(V_back, dstV, vxC, vyC, uv_pitch); } } /////////////////////////////////////////////////////////////////////////// #if 0 #ifdef _DEBUG class MPEGDecoderVerifier { private: YUVPixel src[32][32], dst[32][32], src2[32][32]; int err[2][2][2]; void rnd(); int checkpred(int vx, int vy, int w, int h, bool); public: MPEGDecoderVerifier(); } g_VerifyMPEGDecoder; void MPEGDecoderVerifier::rnd() { int i,j; for(j=0; j<32; j++) { for(i=0; i<32; i++) { src[j][i] = (YUVPixel)rand(); } } } int MPEGDecoderVerifier::checkpred(int vx, int vy, int w, int h, bool add) { YUVPixel p1, p2, p3, p4; int r; int i, j; int e, sum=0; for(j=0; j<h; j++) for(i=0; i<w; i++) { p1 = src[j+((vy+0)>>1)][i+((vx+0)>>1)]; p2 = src[j+((vy+0)>>1)][i+((vx+1)>>1)]; p3 = src[j+((vy+1)>>1)][i+((vx+0)>>1)]; p4 = src[j+((vy+1)>>1)][i+((vx+1)>>1)]; if (add) r = ((int)p1 + (int)p2 + (int)p3 + (int)p4 + 4*src2[j][i] + 4)/8; else r = ((int)p1 + (int)p2 + (int)p3 + (int)p4 + 2)/4; e = abs(r - (int)dst[j][i]); if (e>1) throw MyError("Predictor verify error in MPEG decoder with vector %d,%d!", vx, vy); sum += e; } return sum; } MPEGDecoderVerifier::MPEGDecoderVerifier() { try { int i,j; for(j=0; j<32; j++) { for(i=0; i<32; i++) { src[j][i] = (YUVPixel)rand(); src2[j][i] = (YUVPixel)rand(); } } CPUCheckForExtensions(); CPUEnableExtensions(0); do { _RPT2(0,"MPEG-1 decoder: testing prediction copy (MMX %s / ISSE %s)\n", MMX_enabled ? "on" : "off", ISSE_enabled ? "on" : "off"); memset(err, 0, sizeof err); for(j=0; j<16; j++) { for(i=0; i<16; i++) { video_copy_prediction_Y(&src[0][0], &dst[0][0], i, j, 32); err[0][j&1][i&1] += checkpred(i, j, 16, 16, false); video_copy_prediction_C(&src[0][0], &dst[0][0], i, j, 32); err[1][j&1][i&1] += checkpred(i, j, 8, 8, false); } } if (MMX_enabled) __asm emms _RPT2(0,"full/full: average error %.4lf, %.4lf\n", (double)err[0][0][0] / (16.0*16.0*256.0), (double)err[1][0][0] / (16.0*16.0*64.0)); _RPT2(0,"half/full: average error %.4lf, %.4lf\n", (double)err[0][0][1] / (16.0*16.0*256.0), (double)err[1][0][1] / (16.0*16.0*64.0)); _RPT2(0,"full/half: average error %.4lf, %.4lf\n", (double)err[0][1][0] / (16.0*16.0*256.0), (double)err[1][1][0] / (16.0*16.0*64.0)); _RPT2(0,"half/half: average error %.4lf, %.4lf\n", (double)err[0][1][1] / (16.0*16.0*256.0), (double)err[1][1][1] / (16.0*16.0*64.0)); _RPT2(0,"MPEG-1 decoder: testing prediction add (MMX %s / ISSE %s)\n", MMX_enabled ? "on" : "off", ISSE_enabled ? "on" : "off"); memset(err, 0, sizeof err); for(j=0; j<16; j++) { for(i=0; i<16; i++) { memcpy(dst, src2, sizeof src2); video_add_prediction_Y(&src[0][0], &dst[0][0], i, j, 32); err[0][j&1][i&1] += checkpred(i, j, 16, 16, true); memcpy(dst, src2, sizeof src2); video_add_prediction_C(&src[0][0], &dst[0][0], i, j, 32); err[1][j&1][i&1] += checkpred(i, j, 8, 8, true); } } if (MMX_enabled) __asm emms _RPT2(0,"full/full: average error %.4lf, %.4lf\n", (double)err[0][0][0] / (16.0*16.0*256.0), (double)err[1][0][0] / (16.0*16.0*64.0)); _RPT2(0,"half/full: average error %.4lf, %.4lf\n", (double)err[0][0][1] / (16.0*16.0*256.0), (double)err[1][0][1] / (16.0*16.0*64.0)); _RPT2(0,"full/half: average error %.4lf, %.4lf\n", (double)err[0][1][0] / (16.0*16.0*256.0), (double)err[1][1][0] / (16.0*16.0*64.0)); _RPT2(0,"half/half: average error %.4lf, %.4lf\n", (double)err[0][1][1] / (16.0*16.0*256.0), (double)err[1][1][1] / (16.0*16.0*64.0)); CPUEnableExtensions(MMX_enabled ? ISSE_enabled ? 0 : CPUF_SUPPORTS_MMX|CPUF_SUPPORTS_INTEGER_SSE : CPUF_SUPPORTS_MMX); } while(MMX_enabled || ISSE_enabled); } catch(MyError e) { e.post(NULL, "MPEG-1 decoder verification error"); } } #endif #endif